{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Merges 2 serialized datasets into a single one"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "%run ../../utils.ipynb\n",
    "import dill\n",
    "from dataset import *\n",
    "filename1='/data/scratch/mmerouani/data/speedup_dataset_research_batch1001-2500.pkl'\n",
    "filename2='/data/scratch/mmerouani/data/speedup_dataset_research_batch2501-3000.pkl'\n",
    "merged_filename='speedup_dataset_research_batch1001-3000.pkl'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "loading dataset 1\n",
      "loading dataset 2\n"
     ]
    }
   ],
   "source": [
    "print(\"loading dataset 1\")\n",
    "f = open(filename1, 'rb')\n",
    "dataset_dict1 = dill.load(f)\n",
    "f.close()\n",
    "print(\"loading dataset 2\")\n",
    "f = open(filename2, 'rb')\n",
    "dataset_dict2 = dill.load(f)\n",
    "f.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "nb_prog_ds1 = len(dataset_dict1['programs'])\n",
    "nb_schedules_ds1 = len(dataset_dict1['schedules'])\n",
    "ds2_program_indexes = [x+nb_prog_ds1 for x in dataset_dict2['program_indexes']]\n",
    "programs = dataset_dict1['programs'] + dataset_dict2['programs'] \n",
    "program_indexes = dataset_dict1['program_indexes'] + ds2_program_indexes\n",
    "schedules_array = dataset_dict1['schedules'] + dataset_dict2['schedules']\n",
    "exec_times_array = dataset_dict1['exec_times'] + dataset_dict2['exec_times']\n",
    "speedup_array = dataset_dict1['speedup'] + dataset_dict2['speedup']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "save  = {'programs':programs, 'program_indexes':program_indexes,\n",
    "             'schedules':schedules_array, 'exec_times':exec_times_array, 'speedup': speedup_array}\n",
    "f = open(merged_filename, 'wb')\n",
    "dill.dump(save, f)\n",
    "f.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Checking the result\n"
     ]
    }
   ],
   "source": [
    "#cheking\n",
    "print(\"Checking the result\")\n",
    "f = open(merged_filename, 'rb')\n",
    "mds = dill.load(f)\n",
    "f.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Done\n"
     ]
    }
   ],
   "source": [
    "#checking\n",
    "for i in range(len(mds['schedules'])):\n",
    "    assert mds['schedules'][i].name[:12]==mds['programs'][mds['program_indexes'][i]].name,\"error1 at i=\"+str(i)\n",
    "    if (i<nb_schedules_ds1):\n",
    "        assert (mds['speedup'][i]==dataset_dict1['speedup'][i]) & (mds['schedules'][i]==dataset_dict1['schedules'][i]),\"error2 at i=\"+str(i)\n",
    "    else:\n",
    "        assert (mds['speedup'][i]==dataset_dict2['speedup'][i-nb_schedules_ds1]) & (mds['schedules'][i]==dataset_dict2['schedules'][i-nb_schedules_ds1]),\"error2 at i=\"+str(i)\n",
    "print(\"Done\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
